home *** CD-ROM | disk | FTP | other *** search
- // WaisDocument.m
- //
- // Free software created 1 Feb 1992
- // by Paul Burchard <burchard@math.utah.edu>.
- // Incorporating:
- /*
- WIDE AREA INFORMATION SERVER SOFTWARE:
- No guarantees or restrictions. See the readme file for the full standard
- disclaimer.
-
- This is part of the [NeXTstep] user-interface for the WAIS software.
- Do with it as you please.
-
- Version 0.82
- Wed Apr 24 1991
-
- jonathan@Think.COM
-
- */
- //
-
- #import "WaisDocument.h"
-
- // Search path for documents.
- static id documentFolderList;
-
- // Error panel title.
- static char *errorTitle = "WAIS Document Error!";
-
- // Decoders for WAIS structured files.
-
- _WaisDecoder waisSourceIDDecoder[] =
- {
- { ":filename", W_FIELD,0,0, ReadString,3, WriteString,2,
- MAX_SYMBOL_SIZE },
- { NULL }
- };
-
- _WaisDecoder waisDocumentDecoder[] =
- {
- { ":number-of-lines", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { ":number-of-bytes", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { ":number-of-characters", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { ":best-line", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { ":date", W_FIELD,0,0, ReadString,3, WriteString,2,
- MAX_SYMBOL_SIZE },
- { ":headline", W_FIELD,0,0, ReadString,3, WriteString,2,
- MAX_SYMBOL_SIZE },
- { ":type", W_FIELD,0,0, ReadString,3, WriteString,2,
- MAX_SYMBOL_SIZE },
- { ":source", W_STRUCT,
- ":source-id", waisSourceIDDecoder },
- { ":doc-id", W_STRUCT,
- ":doc-id", NULL/*special case*/ },
- { NULL }
- };
-
- _WaisDecoder waisFragmentDecoder[] =
- {
- { ":para-id", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { ":line-pos", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { ":byte-pos", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { NULL }
- };
-
- _WaisDecoder waisDocumentIDDecoder[] =
- {
- { ":score", W_FIELD,0,0, ReadLongS,2, WriteLongS,2 },
- { ":document", W_STRUCT,
- ":document", waisDocumentDecoder },
- { ":start", W_STRUCT,
- ":fragment", waisFragmentDecoder },
- { ":end", W_STRUCT,
- ":fragment", waisFragmentDecoder },
- { NULL }
- };
-
-
- @implementation WaisDocument
-
- + folderList
- {
- return documentFolderList;
- }
-
- + setFolderList:aList
- {
- if(documentFolderList) [documentFolderList free];
- documentFolderList = aList;
- return self;
- }
-
- + (const char *)defaultHomeFolder
- {
- return "/Library/WAIS/documents";
- }
-
- + (const char *)fileStructName
- {
- return ":document-id";
- }
-
- + (WaisDecoder)fileStructDecoder
- {
- return waisDocumentIDDecoder;
- }
-
- + (const char *)errorTitle
- {
- return errorTitle;
- }
-
- + (BOOL)checkFileName:(const char *)fileName
- {
- // We read in the .wais file corresponding to the doc's content file.
- if(!fileName) return NO;
- if(strlen(fileName) <= strlen(W_D_EXT)) return NO;
- if(!strstr(fileName, W_D_EXT)) return NO;
- if(0 != strcmp(W_D_EXT, strstr(fileName, W_D_EXT))) return NO;
- return YES;
- }
-
- - free
- {
- if(waisDocID) s_free(waisDocID);
- return [super free];
- }
-
- + objectForCompleteKey:(const char *)aKey
- {
- char *buf, *endp;
- id found;
-
- // First remove any ".wais" extension from keys, then call standard method.
- if(!aKey || !strstr(aKey, W_D_EXT))
- return [super objectForCompleteKey:aKey];
- if(!(buf = s_malloc(strlen(aKey)+1))) return nil;
- strcpy(buf, aKey);
- if(!(endp = strstr(buf, W_D_EXT))) { s_free(buf); return nil; }
- *endp = 0;
- found = [super objectForCompleteKey:buf];
- s_free(buf);
- if(found) return found;
- else return nil;
- }
-
- - setKey:(const char *)aKey
- {
- char *buf, *endp;
- id rtn;
-
- // First remove any ".wais" extension from keys.
- if(!aKey || !(endp = strstr(aKey, W_D_EXT)))
- return [super setKey:aKey];
- if(!(buf = s_malloc(strlen(aKey)+1))) return nil;
- strcpy(buf, aKey);
- if(!(endp = strstr(buf, W_D_EXT))) return nil;
- *endp = 0;
- rtn = [super setKey:buf];
- s_free(buf);
- return rtn;
- }
-
- - setKeyFromInfo
- {
- char *buf, *p, *hname, *extn, *jump;
- const char *head, *src, *src_end, *src_ext;
- const char *headline, *theType;
- const char **foldp, *fold;
- int len, extcnt, non_blank;
-
- // HACKING WAIS HEADLINES INTO FILE NAMES AND TYPES.
- //
- //!!! This is a hack. WAIS servers should make better use of TYPE field.
- //
- // Uses the ":headline", ":type", and [source] ":filename" info fields,
- // information in the fromSource member.
- //
- // The "key" member is set to the headline altered with this recipe:
- // 1. Everything between the first and last '/' inclusive, is removed
- // and replaced by a space. If the first '/' was directly
- // preceeded by a blank, but the initial portion is not totally
- // blank, all following text is removed as well.
- // 2. Trailing & leading blanks of the new string are removed,
- // sequences of blanks are compressed into single space chars,
- // and non-ascii chars are replaced by '?'.
- // 3. IF the string is now empty it is replaced by "?".
- // 4. ELSE IF the new string ends with a file extension
- // ("[.][A-Za-z0-9]+"), then the extension is lowercased.
- // 5. If the waisType input is non-NULL and different from "TEXT",
- // then the extension is changed to reflect the waisType.
- // 6. As long as the document is not a WAIS source itself (according to
- // ":type" field), the source's ":filename", excluding the ".src"
- // extension but followed by a ':', is prepended. Or, if the
- // ":filename" info field is blank, our ":filename" field is
- // tried (this would come from a source), or lastly the
- // final component of the source's "key" member is used instead.
- // 7. The default folder for WaisDocuments is prepended (with
- // separating '/' if necessary), unless the document is a WAIS
- // source, in which case the default folder for WaisSources is
- // prepended instead.
- //
- headline = [self valueForStringKey:":headline"];
- theType = [self valueForStringKey:":type"];
- if(theType && 0==strcmp(theType, "WSRC"))
- foldp = (const char **)[[WaisSource folderList] elementAt:0];
- else foldp = (const char **)[[WaisDocument folderList] elementAt:0];
- if(foldp) fold = *foldp;
- else fold = "/";
- if(!(fromSource && (src=[fromSource valueForStringKey:":filename"]))
- && !(src = [self valueForStringKey:":filename"])
- && !(fromSource && (src=[fromSource key])))
- src = "?";
- if(strrchr(src, '/')) { src = strrchr(src, '/'); src++; }
- if(headline) len = strlen(headline);
- else len = strlen("?");
- if(!(buf = s_malloc(strlen(fold) + strlen(src) + len + strlen("/:.tiff"))))
- return nil;
- strcpy(buf, fold);
- p = buf + strlen(buf);
- if(buf[0] != '/') return nil;
- if(*(p-1) != '/') *p++ = '/';
- src_end = src + strlen(src);
- if(strlen(src)<4 || 0!=strcmp((src_ext=src_end-4), ".src"))
- src_ext = src_end;
- if(!theType || 0!=strcmp(theType, "WSRC"))
- { for(; src<src_ext; src++) *p++ = *src; *p++ = ':'; }
- *(hname=p) = 0;
- if(headline) head = headline;
- else head = "?";
- for(; isascii(*head) && isspace(*head); head++);
- for(non_blank=0; *head; head++)
- if(!(isascii(*head) && isspace(*head)
- && isascii(*(p-1)) && isspace(*(p-1))))
- {
- if(*head == '/')
- {
- if(non_blank && isascii(*(p-1)) && isspace(*(p-1))) break;
- else
- {
- if(!(jump = strrchr(head, '/'))) jump = head;
- head = jump;
- *p++ = ' ';
- continue;
- }
- }
- if(isascii(*head) && isspace(*head)) *p++ = ' ';
- else { *p++ = *head; non_blank = 1; }
- }
- for(p--; p>=hname && isascii(*p) && isspace(*p); p--);
- *++p = 0;
- for(p=hname; *p; p++) if(!isascii(*p)) *p = '?';
- extn = 0;
- if(p == hname) { *p++ = '?'; *p = 0; }
- else
- {
- p = hname + strlen(hname);
- for(extcnt=0, p--; p>hname && isalnum(*p); p--) extcnt++;
- if(*p=='.' && extcnt>0)
- { for(extn=p, p++; *p; p++) if(isupper(*p)) *p = tolower(*p); }
- }
- if(theType && 0!=strcmp(theType, "TEXT"))
- {
- if(0 == strcmp(theType, "WSRC"))
- { if(extn) strcpy(extn, ".src"); else strcat(buf, ".src"); }
- else if(0 == strcmp(theType, "TIFF"))
- { if(extn) strcpy(extn, ".tiff"); else strcat(buf, ".tiff"); }
- else if(0 == strcmp(theType, "GIF"))
- { if(extn) strcpy(extn, ".gif"); else strcat(buf, ".gif"); }
- }
- [self setKey:buf];
- s_free(buf);
- return self;
- }
-
- - fromSource
- {
- return fromSource;
- }
-
- - setFromSource:aSource
- {
- const char *src;
-
- fromSource = aSource;
- isRetrieved = NO;
- if(fromSource && [fromSource valueForStringKey:":filename"])
- [self insertStringKey:":filename"
- value:[fromSource valueForStringKey:":filename"]];
- else if(fromSource && [fromSource key])
- {
- src = [fromSource key];
- if(strrchr(src, '/')) { src = strrchr(src, '/'); src++; }
- [self insertStringKey:":filename" value:src];
- }
- return self;
- }
-
- - (DocID *)waisDocID
- {
- return waisDocID;
- }
-
- // theDocID must be s_free()-able.
- - setWaisDocID:(DocID *)theDocID
- {
- if(waisDocID) s_free(waisDocID);
- waisDocID = theDocID;
- return self;
- }
-
- - setWaisDocIDFromAny:(any *)docAny
- {
- isRetrieved = NO;
- if(waisDocID) s_free(waisDocID);
- if(!(waisDocID = docIDFromAny(docAny)))
- {
- waisDocID = (DocID *)s_malloc(sizeof(DocID));
- waisDocID->originalLocalID = copy_any(docAny);
- }
- return self;
- }
-
- - (BOOL)isRetrieved
- {
- return isRetrieved;
- }
-
- - setUnretrieved
- {
- isRetrieved = NO;
- return self;
- }
-
- - cleanUpClose:(FILE *)file free:(any *)ptr
- {
- [Wais lockFileIO]; fclose(file); [Wais unlockFileIO];
- if(ptr) s_free(ptr);
- return nil;
- }
-
- - retrieve
- {
- int i;
- long lines, size, count, chars, length;
- long request_length, chars_per_page;
- const char *value, *database, *wType;
- static char request[MAX_MESSAGE_LEN], response[MAX_MESSAGE_LEN];
- FILE *file;
- any* docany;
- WAISDocumentText *data;
- SearchResponseAPDU *interp_response;
- diagnosticRecord **diag;
- extern char *delete_seeker_codes();/* in ui.c, but not declared in ui.h */
-
- // Set up source for retrieval.
- isRetrieved = NO;
- [fromSource setConnected:YES];
- if(![fromSource isConnected]) return nil;
-
- // Open local document file to receive retrieved data.
- [Wais lockFileIO];
- if(!key || !(file = fopen(key, "w")))
- {
- [Wais unlockFileIO];
- ErrorMsg(errorTitle, "Can't create local document file %s.",
- key ? key : "???");
- return nil;
- }
- [Wais unlockFileIO];
-
- // parameters for "page-by-page" retrieval loop.
- if(value=[self valueForStringKey:":number-of-lines"]) lines = atol(value);
- else lines = 0;
- if(value=[self valueForStringKey:":number-of-bytes"]) chars = atol(value);
- else if(value=[self valueForStringKey:":number-of-characters"])
- chars = atol(value);
- else chars = 0;
- size = 0;
- chars_per_page = [fromSource bufferLength]-HEADER_LENGTH-1000;/*paranoia?*/
- docany = anyFromDocID(waisDocID);
- database = [fromSource valueForStringKey:":database-name"];
- wType = [self valueForStringKey:":type"];
- if(!wType) wType = "TEXT";
- if(lines<=0 && chars<=0)
- {
- [self cleanUpClose:file free:docany];
- ErrorMsg(errorTitle, "Document %s is empty.", key);
- return nil;
- }
-
- // Retrieve one page at a time and write to local doc file.
- for(count=0; count*chars_per_page<chars; count++)
- {
- // Lock transaction to prevent conflict with port.
- [Wais lockTransaction];
-
- // Create retrieval request message.
- request_length = [fromSource bufferLength];
- if(!generate_retrieval_apdu(request + HEADER_LENGTH,
- &request_length, docany, CT_byte, count * chars_per_page,
- MIN((count + 1) * chars_per_page, chars), wType, database))
- {
- [Wais unlockTransaction]; [self cleanUpClose:file free:docany];
- ErrorMsg(errorTitle, "Overflow: retrieval request too large for %s.", key);
- return nil;
- }
-
- // Send retrieval message.
- if(!interpret_message(request, MAX_MESSAGE_LEN - request_length,
- response, MAX_MESSAGE_LEN, [fromSource connection], false))
- {
- [Wais unlockTransaction]; [self cleanUpClose:file free:docany];
- ErrorMsg(errorTitle,"Warning: missing data for document %s.",key);
- return nil;
- }
-
- // Interpret received reply message.
- // Transaction is done; unlock.
- readSearchResponseAPDU(&interp_response, response + HEADER_LENGTH);
- [Wais unlockTransaction];
- if(interp_response
- && (WAISSearchResponse *)interp_response
- ->DatabaseDiagnosticRecords
- && (diag = ((WAISSearchResponse *)interp_response
- ->DatabaseDiagnosticRecords)->Diagnostics)
- )
- for(i=0; diag[i]; i++) if(diag[i]->ADDINFO)
- ErrorMsg(errorTitle, "Retrieval diagnostics: %s, %s",
- diag[i]->DIAG, diag[i]->ADDINFO);
-
- // Extract document data chunk from response.
- // If of file type "TEXT", strip out weird stuff.
- // (Note "TEXT" type is ASCII-based, not international.)
- if(!((WAISSearchResponse *)interp_response
- ->DatabaseDiagnosticRecords)->Text)
- {
- [self cleanUpClose:file free:docany];
- ErrorMsg(errorTitle,"Warning: missing data for document %s.",key);
- return nil;
- }
- data = ((WAISSearchResponse *)interp_response
- ->DatabaseDiagnosticRecords)->Text[0];
- if(0 == strcmp(wType, "TEXT"))
- {
- length = data->DocumentText->size;
- delete_seeker_codes(data->DocumentText->bytes, &length);
- data->DocumentText->size = length;
- replace_controlM(data->DocumentText->bytes, &length);
- data->DocumentText->size = length;
- }
- size += data->DocumentText->size;
-
- // Write data chunk to file.
- [Wais lockFileIO];
- if(data->DocumentText->size
- != fwrite(data->DocumentText->bytes, sizeof(char),
- (size_t)data->DocumentText->size, file))
- {
- [Wais unlockFileIO]; [self cleanUpClose:file free:docany];
- ErrorMsg(errorTitle, "Write error on document %s.", key);
- return nil;
- }
- [Wais unlockFileIO];
- }
- [self cleanUpClose:file free:docany];
- [Wais lockTransaction];
- isRetrieved = YES;
- [Wais unlockTransaction];
- return self;
- }
-
- - (short)readWaisStruct:(const char *)structName
- forElement:(const char *)elementName
- fromFile:(FILE *)file
- withDecoder:(WaisDecoder)theDecoder
- {
- short check_result;
- DocID *docid;
-
- // Use doc-id shortcut routine.
- if(0 == strcmp(structName, ":doc-id"))
- {
- if(!(docid = (DocID *)s_malloc(sizeof(DocID)))) return FALSE;
- check_result = ReadDocID(docid, file);
- if(check_result==FALSE || check_result==END_OF_STRUCT_OR_LIST)
- { s_free(docid); return check_result; }
- [self setWaisDocID:docid];
- return check_result;
- }
-
- // Standard read.
- check_result = [super readWaisStruct:structName
- forElement:elementName fromFile:file withDecoder:theDecoder];
-
- // We flatten the WAIS document file structure for convenience,
- // so must avoid confusing ":start", ":end" subfields (both are frags).
- if(0==strcmp(elementName, ":start") || 0==strcmp(elementName, ":end"))
- {
- if([self valueForStringKey:":byte-pos"])
- [self insertStringKey:elementName
- value:[self valueForStringKey:":byte-pos"]];
- else if([self valueForStringKey:":line-pos"])
- [self insertStringKey:elementName
- value:[self valueForStringKey:":line-pos"]];
- else if([self valueForStringKey:":para-id"])
- [self insertStringKey:elementName
- value:[self valueForStringKey:":para-id"]];
- }
-
- // Find source if necessary.
- if(0==strcmp(structName, ":source-id"))
- {
- [self setFromSource:[WaisSource objectForKey:[self
- valueForStringKey:":filename"]]];
- if(!fromSource) ErrorMsg(errorTitle, "Unknown source %s.",
- [self valueForStringKey:":filename"]);
- }
-
- // Set key from info, if still NULL even tho full doc record has been read.
- if(!key && 0==strcmp(structName, [WaisDocument fileStructName]))
- { isRetrieved = NO; [self setKeyFromInfo]; }
- return check_result;
- }
-
- - readWaisFile
- {
- NXAtom orig_key;
- char buf[MAXPATHLEN+1];
-
- // We read WAIS specification file rather than content file,
- // so temporarily append ".wais" to key (note call to super
- // since our -setKey: strips the ".wais").
- if(!key) return nil;
- orig_key = key;
- strcpy(buf, orig_key);
- strcat(buf, W_D_EXT);
- [super setKey:buf];
- if(![super readWaisFile]) return nil;
- [self setKey:orig_key];
-
- // Mark doc as retrieved if file named by (original) key exists.
- [Wais lockFileIO];
- if(0 == access(key, R_OK)) isRetrieved = YES;
- else isRetrieved = NO;
- [Wais unlockFileIO];
- return self;
- }
-
- - (short)writeWaisStruct:(const char *)structName
- forElement:(const char *)elementName
- toFile:(FILE *)file
- withDecoder:(WaisDecoder)theDecoder
- {
- // Use doc-id shortcut routine.
- if(0 == strcmp(structName, ":doc-id"))
- {
- if(waisDocID)
- { WriteDocID(waisDocID, file); WriteNewline(file); return TRUE; }
- else { ErrorMsg(errorTitle, "No Doc-ID for %s.", key); return FALSE; }
- }
-
- // We flatten the WAIS document file structure for convenience,
- // so must avoid confusing ":start", ":end" subfields (both are frags).
- if(0==strcmp(elementName, ":start") || 0==strcmp(elementName, ":end"))
- {
- //!!! note kludge from xwais: we ignore distinctions here!
- [self insertStringKey:":byte-pos"
- value:[self valueForStringKey:elementName]];
- [self insertStringKey:":line-pos" value:NULL];
- [self insertStringKey:":para-id" value:NULL];
- }
-
- // Standard write.
- return [super writeWaisStruct:structName
- forElement:elementName toFile:file withDecoder:theDecoder];
- }
-
- - writeWaisFile
- {
- NXAtom orig_key;
- char buf[MAXPATHLEN+1];
-
- // Fill in missing fields.
- if(![self valueForStringKey:":date"]
- || strlen([self valueForStringKey:":date"])==0)
- [self insertStringKey:":date" value:"0"];
-
- // We write WAIS specification file rather than content file,
- // so temporarily append ".wais" to key (note call to super
- // since our -setKey: strips the ".wais").
- if(!key) return nil;
- orig_key = key;
- strcpy(buf, orig_key);
- strcat(buf, W_D_EXT);
- [super setKey:buf];
- if(![super writeWaisFile]) return nil;
- [self setKey:orig_key];
- return self;
- }
-
- @end
-
-
-
-
-
-
-